Live Google Dataset Search Advanced
Live Google Dataset Search provides real-time data on the top 20 Google Dataset search engine results. These results are specific to the indicated keyword. You can specify other parameters optionally.
Live Google Dataset Search provides real-time data on the top 20 Google Dataset search engine results. These results are specific to the indicated keyword. You can specify other parameters optionally.
Instead of ‘login’ and ‘password’ use your credentials from https://app.dataforseo.com/api-dashboard
<?php // You can download this file from here https://cdn.dataforseo.com/v3/examples/php/php_RestClient.zip require('RestClient.php'); $api_url = 'https://api.dataforseo.com/'; try { // Instead of 'login' and 'password' use your credentials from https://app.dataforseo.com/api-dashboard $client = new RestClient($api_url, null, 'login', 'password'); } catch (RestClientException $e) { echo "n"; print "HTTP code: {$e->getHttpCode()}n"; print "Error code: {$e->getCode()}n"; print "Message: {$e->getMessage()}n"; print $e->getTraceAsString(); echo "n"; exit(); } $post_array = array(); // You can set only one task at a time $post_array[] = array( "keyword" => "water quality", "last_updated" => "1m", "file_formats" => [ "archive", "image" ], "usage_rights" => "noncommercial", "is_free" => true, "topics" => [ "natural_sciences", "geo" ] ); try { // POST /v3/serp/google/dataset_search/live/advanced // in addition to 'google' and 'dataset_search' you can also set other search engine and type parameters // the full list of possible parameters is available in documentation $result = $client->post('/v3/serp/google/dataset_search/live/advanced', $post_array); print_r($result); // do something with post result } catch (RestClientException $e) { echo "n"; print "HTTP code: {$e->getHttpCode()}n"; print "Error code: {$e->getCode()}n"; print "Message: {$e->getMessage()}n"; print $e->getTraceAsString(); echo "n"; } $client = null; ?>
The above command returns JSON structured like this:
{ "version": "0.1.20221214", "status_code": 20000, "status_message": "Ok.", "time": "2.0795 sec.", "cost": 0.002, "tasks_count": 1, "tasks_error": 0, "tasks": [ { "id": "01161741-1535-0139-0000-5eb67a6e8212", "status_code": 20000, "status_message": "Ok.", "time": "2.0246 sec.", "cost": 0.002, "result_count": 1, "path": [ "v3", "serp", "google", "dataset_search", "live", "advanced" ], "data": { "api": "serp", "function": "live", "se": "google", "se_type": "dataset_search", "keyword": "water quality", "last_updated": "1m", "file_formats": [ "archive", "image" ], "usage_rights": "noncommercial", "is_free": true, "topics": [ "natural_sciences", "geo" ], "device": "desktop", "os": "windows" }, "result": [ { "keyword": "water quality", "se_domain": "datasetsearch.research.google.com", "language_code": "en", "check_url": "https://datasetsearch.research.google.com/search?query=water%20quality&hl=en&filters=WyJbXCJ1cGRhdGVkX2RhdGVcIixbXCIxbVwiXV0iLCJbXCJmaWxlX2Zvcm1hdF9jbGFzc1wiLFtcIjdcIixcIjVcIl1dIiwiW1wibGljZW5zZV9jbGFzc1wiLFtcIm5vbmNvbW1lcmNpYWxcIl1dIiwiW1wiaXNfYWNjZXNzaWJsZV9mb3JfZnJlZVwiLFtdXSIsIltcImZpZWxkX29mX3N0dWR5XCIsW1wibmF0dXJhbF9zY2llbmNlc1wiLFwiZ2VvXCJdXSJd", "datetime": "2023-01-16 15:41:03 +00:00", "spell": null, "item_types": [ "dataset" ], "se_results_count": 11, "items_count": 11, "items": [ { "type": "dataset", "rank_group": 1, "rank_absolute": 1, "position": "left", "xpath": null, "dataset_id": "L2cvMTFwYzA4cmhqeg==", "title": "Logan River Observatory: South Logan Benson Canal at Benson Irrigation Company Flume, 2300 North 600 West Aquatic Site (SLB_600W_CNL) Quality Controlled Data", "image_url": null, "scholarly_citations_count": null, "links": [ { "type": "link_element", "title": "hydroshare.org", "description": null, "url": "http://www.hydroshare.org/", "domain": "www.hydroshare.org" }, { "type": "link_element", "title": "dataone.org", "description": null, "url": "http://search.dataone.org/", "domain": "search.dataone.org" } ], "dataset_providers": [ { "type": "dataset_providers_element", "title": "HydroShare", "url": null, "domain": null } ], "formats": [ { "type": "formats_element", "format": "zip", "size": null } ], "authors": [ { "type": "authors_element", "name": "Logan River Observatory", "url": null, "domain": null } ], "licenses": [ { "type": "licenses_element", "title": "Attribution 4.0 (CC BY 4.0)", "url": "https://creativecommons.org/licenses/by/4.0/", "domain": "creativecommons.org" } ], "updated_date": "2022-12-27 02:00:00 +00:00", "area_covered": [ "2300 North 600 West", "South Logan Benson Canal at Benson Irrigation Company Flume", "Logan", "North America", "Rocky Mountains" ], "period_covered": null, "dataset_description": { "text": "This dataset contains quality control level 1 (QC1) data for all of the variables measured for the aquatic site on the South Logan Benson Canal at Benson Irrigation Company Flume, 2300 North 600 West (SLB_600W_CNL). Each file contains all available QC1 data for a specific variable. Files will be updated as new data become available, but no more than once daily. These data have passed QA/QC procedures such as sensor calibration and visual inspection and removal of obvious errors. These data are approved by Technicians as the best available version of the data. See published script for correction steps specific to this data series. Each file header contains detailed metadata for site information, variable and method information, source information, and qualifiers referenced in the data. This site is currently operated as part of the Logan River Observatory.\n", "links": null } }, { "type": "dataset", "rank_group": 2, "rank_absolute": 2, "position": "left", "xpath": null, "dataset_id": "L2cvMTFuMDQ3X3B6aA==", "title": "Lake Simcoe Monitoring", "image_url": null, "scholarly_citations_count": 31, "links": [ { "type": "link_element", "title": "canada.ca", "description": null, "url": "http://open.canada.ca/", "domain": "open.canada.ca" }, { "type": "link_element", "title": "arctic-sdi.org", "description": null, "url": "http://catalogue.arctic-sdi.org/", "domain": "catalogue.arctic-sdi.org" } ], "dataset_providers": [ { "type": "dataset_providers_element", "title": "Government of Ontario", "url": null, "domain": null } ], "formats": [ { "type": "formats_element", "format": "pdf", "size": null }, { "type": "formats_element", "format": "html", "size": null }, { "type": "formats_element", "format": "zip", "size": null } ], "authors": null, "licenses": [ { "type": "licenses_element", "title": "Open Government Licence - Canada 2.0", "url": "https://open.canada.ca/en/open-government-licence-canada", "domain": "open.canada.ca" } ], "updated_date": "2022-12-30 02:00:00 +00:00", "area_covered": null, "period_covered": { "start_date": "1980-01-01 03:00:00 +00:00", "end_date": "2021-12-31 02:00:00 +00:00", "displayed_date": "Jan 1, 1980 - Dec 31, 2021" }, "dataset_description": { "text": "The Lake Simcoe lake monitoring program provides measurements of chemical and physical water quality limits such as total phosphorus, nitrogen, chlorophyll a, pH, alkalinity, conductivity, dissolved organic and inorganic carbon, silica, other ions, water transparency, temperature and dissolved oxygen. Samples are collected biweekly during the spring, summer and fall. *[pH]: potential of hydrogen\n", "links": null } } ] } ] } ] }
All POST data should be sent in the JSON format (UTF-8 encoding). When setting a task, you should send all task parameters in the task array of the generic POST array. You can send up to 2000 API calls per minute, each Live SERP API call can contain only one task.
Below you will find a detailed description of the fields you can use for setting a task.
Description of the fields for setting a task:
Field name | Type | Description |
---|---|---|
keyword |
string | keyword required field you can specify up to 700 symbols in the keyword fieldall %## will be decoded (plus symbol ‘+’ will be decoded to a space character) if you need to use the “%” symbol for your keyword , please specify it as “%25”;if you need to use the “+” symbol for your keyword , please specify it as “%2B”;
|
language_name |
string | full name of search engine language optional field if you use this field, you don’t need to specify language_code only value: English |
language_code |
string | search engine language code optional field if you don’t specify language_name if you use this field, you don’t need to specify language_name only value: en |
device |
string | device type optional field only value: desktop |
os |
string | device operating system optional field choose from the following values: windows , macos default value: windows |
depth |
integer | parsing depth optional field number of results in SERP default value: 20 max value: 700 Note: your account will be billed per each SERP containing up to 20 results; thus, setting a depth above 20 may result in additional charges if the search engine returns more than 20 results;if the specified depth is higher than the number of results in the response, the difference will be refunded automatically to your account balance |
last_updated |
string | last time the dataset was updated optional field possible values: 1m , 1y , 3y
|
file_formats |
array | file formats of the dataset optional field possible values: other , archive , text , image , document , tabular
|
usage_rights |
string | usage rights of the dataset optional field possible values: commercial , noncommercial
|
is_free |
boolean | indicates whether displayed datasets are free optional field possible values: true , false
|
topics |
array | dataset topics optional field possible values: humanities , social_sciences , life_sciences , agriculture , natural_sciences , geo , computer , architecture_and_urban_planning , engineering
|
tag |
string | user-defined task identifier optional field the character limit is 255 you can use this parameter to identify the task and match it with the result you will find the specified tag value in the data object of the response |
As a response of the API server, you will receive JSON-encoded data containing a tasks
array with the information specific to the set tasks.
Description of the fields in the results array:
Field name | Type | Description |
---|---|---|
version |
string | the current version of the API |
status_code |
integer | general status code you can find the full list of the response codes here Note: we strongly recommend designing a necessary system for handling related exceptional or error conditions |
status_message |
string | general informational message you can find the full list of general informational messages here |
time |
string | execution time, seconds |
cost |
float | total tasks cost, USD |
tasks_count |
integer | the number of tasks in the tasks array |
tasks_error |
integer | the number of tasks in the tasks array returned with an error |
tasks |
array | array of tasks |
id |
string | task identifier unique task identifier in our system in the UUID format |
status_code |
integer | status code of the task generated by DataForSEO; can be within the following range: 10000-60000 you can find the full list of the response codes here |
status_message |
string | informational message of the task you can find the full list of general informational messages here |
time |
string | execution time, seconds |
cost |
float | cost of the task, USD |
result_count |
integer | number of elements in the result array |
path |
array | URL path |
data |
object | contains the same parameters that you specified in the POST request |
result |
array | array of results |
keyword |
string | keyword received in a POST array the keyword is returned with decoded %## (plus symbol ‘+’ will be decoded to a space character) |
se_domain |
string | search engine domain in a POST array |
language_code |
string | language code in a POST array |
check_url |
string | direct URL to search engine results you can use it to make sure that we provided accurate results |
datetime |
string | date and time when the result was received in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00” example: 2019-11-15 12:57:46 +00:00 |
spell |
object | autocorrection of the search engine if the search engine provided results for a keyword that was corrected, we will specify the keyword corrected by the search engine and the type of autocorrection |
item_types |
array | types of search results in SERP contains types of search results ( items ) found in SERP.possible item type: dataset
|
se_results_count |
integer | total number of results in SERP |
items_count |
integer | the number of results returned in the items array |
items |
array | elements of search results found in SERP |
type |
string | type of element = ‘dataset’ |
rank_group |
integer | group rank in SERP position within a group of elements with identical type valuespositions of elements with different type values are omitted from rank_group |
rank_absolute |
integer | absolute rank in SERP absolute position among all the elements in SERP |
position |
string | the alignment of the element in SERP can take the following values: left , right |
xpath |
string | the XPath of the element |
dataset_id |
string | ID of the dataset |
title |
string | title of the result in SERP |
image_url |
string | URL of the image the URL leading to the image on the original resource or DataForSEO storage (in case the original source is not available) |
scholarly_citations_count |
integer | count of articles that refer to the dataset |
links |
array | sitelinks the links shown below some of Google Dataset’s search results if there are none, equals null |
type |
string | type of element = ‘link_element‘ |
title |
string | title of the result in SERP |
description |
string | description of the results element in SERP |
url |
string | sitelink URL |
domain |
string | domain in SERP |
dataset_providers |
array | the list of institutions that provided the dataset |
type |
string | type of element = ‘dataset_providers_element‘ |
title |
string | name of the dataset provider |
url |
string | site URL of the dataset provider |
domain |
string | site domain of the dataset provider |
formats |
array | the list of file formats of the dataset |
type |
string | type of element = ‘formats_element‘ |
format |
string | type of file format of the dataset for example: zip , html , csv
|
size |
string | file size in bytes |
authors |
array | the list of authors of the dataset |
type |
string | type of element = ‘authors_element‘ |
name |
string | name of the dataset author |
url |
string | author’s link URL |
domain |
string | author’s link domain |
licenses |
array | the list of licenses issued to the dataset |
type |
string | type of element = ‘licenses_element‘ |
title |
string | name of the license |
url |
string | license URL |
domain |
string | license page domain |
updated_date |
string | date and time when the result was last updated in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00” example: 2022-11-27 02:00:00 +00:00 |
area_covered |
array | the list of areas covered in the dataset for example: Africa , Global
|
period_covered |
object | period covered in the dataset |
start_date |
string | date and time when the period starts in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00” example: 2020-03-02 02:00:00 +00:00 |
end_date |
string | date and time when the period ends in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00” example: 2022-12-09 02:00:00 +00:00 |
displayed_date |
string | period displayed in SERP example: Mar 2, 2020 - Dec 9, 2022 |
dataset_description |
object | description of the dataset |
text |
string | text of the description |
links |
array | links featured in the ‘dataset_description’ |
type |
string | type of element = ‘link_element‘ |
title |
string | link anchor text |
description |
string | description of the results element in SERP |
url |
string | URL link |
domain |
string | domain in SERP |